import requests
from lxml import etree
import os

# 设置当前的文件夹为‘E:\作业’
os.chdir(r"E:\作业")

# 设置头信息
header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'}

# 设置初始的url
url = 'https://xiaohua.zol.com.cn/'

# 设置保存的文件
f = open("joke.txt", "w")


# 从页面中获取详细信息的地址集合
def page_list(page_url):
    resp = requests.get(page_url, headers=header)
    html = etree.HTML(resp.text)
    result = html.xpath("//ul[@class='article-list']/li//a[@class='all-read']/@href")
    return result

#获取详细内容，并保存到文件中
def getPageDetail(detail_rul):
    resp_d = requests.get(detail_rul, headers=header)
    html_d = etree.HTML(resp_d.text)
    title = html_d.xpath("//div[@class='section article']//h1/text()")
    # print(title[0])
    content_list = html_d.xpath("//div[@class='section article']//div[@class='article-text']//text()")
    all_content = ""
    for c in content_list:
        all_content = all_content + c
    # 捕获异常的处理
    try:
        f.write(title[0])

        f.write(all_content)
    except:
        print(all_content)


def main():
    for page in range(1, 11):
        result = page_list(url + "new/" + str(page) + ".html")
        for detail in result:
            print(detail)
            getPageDetail(url + detail)
        break


if __name__ == '__main__':
    main()
